{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "54a609477af23a56",
   "metadata": {},
   "source": [
    "## Chunks（chunk shape）"
   ]
  },
  {
   "attachments": {
    "e7183a3b-57d2-4a0c-8c69-d575e063fcf2.png": {
     "image/png": "iVBORw0KGgoAAAANSUhEUgAAAg8AAAD9CAYAAAA74JSZAAAgAElEQVR4Ae2dD5BU1Z3v7yCCJP6LRh9RUTdLZbOaPyYSXqI+E2MM0SJWOYbE+GLcVUsfpZWNz1qLWooqSuGVZZ6xgKHm4bIkhgwhujA4xR/HNpmg48wgRHcCGtANkaCii/H/MDgj/F7d7r7dt7vvTJ8z3O57frc/VE110/3re8/5/c4538/9nXPP9YR/eAAP4AE8gAfwAB6w8IC3bds2yWQe5y/FPvBjXO1fX9+WxNpAf39/teJJb29fYuVLqn9s3769ql+2bqX/JhWfIz3vjh07qsa3u/up1LT7Xbt2Va0vBno84GUyGVm0aC1/KfXB4sVrxY/xaP+Gh4els/OxRNrAkiXt0tXVNVrxZGhoSDo7G6udtrS0y+bNm0f1i//lpk2dicSNMePIxszW1nVSDeoHBgZkw4Z0xNevb29vb9X2jIEeDxTgoaNjn/CXPh/YwMPixY/UtQ2sW7dXbOBhyZKOupYvqf7Q3r5HbOEhqbJyXvsx46GHdokNPCxdulF1u1+zZne2vsCDHjAwKSnwkHJoAh7sB/ekBRF40BczmzYDPJhIEzauewB4AB4kmLYg8+CGaAEPbsTBBghsbIEH12WR8pl4AHgAHoAHx9oA8AA8+IN3sOaBaQsTKcOm3h4AHhwTDpsrGBNbpi30CRHwoC9mJn0xsCHzUG+ZS/B8Hx6U9wcG5NDhBMtQo1MDD8ADmQfH2gDwADz44z2ZhxqpXj0P+6dnpXfLFtn+/MtyqJ7nrcO5gAfHhCO4OonrlcyDPiECHvTFzKa/knmog7I5corhp1ql9YEH5eF1ffLaX9+WNCUggAfggcyDY20AeAAefO0j8+AIARxBMQY6rpGbZs+R+Yva5an+52Vw+AgO5thPgQfHhMPmCsbElsyDPiECHvTFzKQvBjZkHhxTwRoWZ7DjBpl2wQy5+H/dKQt+tlFe3DNQw7PV99DAA/BA5sGxNgA8AA++DJB5qK8Y1uJsgx23yJRTzpKTL7xQvnHbffJI1x55vxYnSuCYwINjwhFcncT1SuZBnxABD/piZtNfyTwkoHQJndKHh1PHjRfv+OPl5Iu/L//y8x55LiVTF8AD8EDmwbE2ADwAD77WkXlISPFjPK0PD6d4nnj+34lnyrfmLJdH96Rj2STw4Jhw2FzBmNiSedAnRMCDvpiZ9MXAhsxDjOrs+KFK4MFrkinf/idZ8bt35aDj5TYpHvAAPJB5cKwNAA/Agz94k3kwkTC3bUrhwZOPTL1M/rmtX15KQfIBeHBMOIKrk7heyTzoEyLgQV/MbPormQe3BT/O0pXDg3fMJ+W78x6R/hSsmgQegAcyD461AeABePAFjMxDnDKezLEq4MH7qFx01RJ56i/6Uw/Ag2PCYXMFY2JL5kGfEAEP+mJm0hcDGzIPyQh5EmethIcmOf1zt8qvn30vieLEek7gAXgg8+BYGwAegAd/lCfzEKvWJXKwSnjw5JjJM+Te3+5Vv1V1AR4WLVor/KXPBzaZhyTiv2RJu3R1dY3asYeGhqSzM9NQ7bOlpV02b948ql/8Lzdt6mwovyTRRmtxztbWdbJt27ZR4xvAQy3OX+9j+vXt7e0dtb5p/DIKHrwJn5abH35atO816fkNOJPJ8JdiH1QbpPxOu2XLlsTaQH9/f9Vxo6+vL7HyJdU/tm/fXtUv9F+9Y9eOHTuqxre7uzs17X7nzp1V65s2g0h48E6Sb9z7W3lXeWU95eWn+HgAD+ABPIAHnPSADw+nBptEFV7Hy9Sb18p/OVli80IBD+a+whIP4AE8gAfwgLEHfHiYXICG/E6TnicnzWiRrUPGh3HSEHhwMiwUCg/gATyAB7R7wH+q5iePm5DbnjoEERMumCedr+muHfCgO36UHg/gATyABxz1wOD662XaZ86SyeOLWQf/ORfjzr9FHn72kKOlNisW8GDmJ6zwAB7AA3gAD1h54ODG+XLDNTPkqycfXZp9+MRV0vJ73Y/XBB6smgLGeAAP4AE8gAfMPDD0xINy/71zZPZ5Z8uE0LSFN/FLMq9b9/0WwINZG8AKD+ABPIAH8ICVBw490yWdnavlnu99U06fFJ66OE1uWf+i1bFcMwYeXIsI5cEDeAAP4IF0eODt1+WVl5+WNT+5Ub7yiTA8nCTXrtS970V+k6jHJZPhL60+MNkkautWf7OwZNqA6+XDL8m0i6T8Xq/zVtskyt9hcsuWpxPrl3H7oVp900EL5bU4LIeGX5E/dNwj//O8Y0LrHibJjPufLzdW9X+2p075ttwm21P7Lbaz87FEtjn2t6f2d3Cs9q8Rt6c28QvbU+vcUt5ke+o333xTNmxIx/bjJvWtNgZo/v7tzY/IHd8+TZoK6x6Oks/M3yGa77cowEPwxDde0/VQHlt4qHf8beGh3uVL6nz+sy1s4CGpcnJe+/HC9KmaATwsW/a4aPazaX01w0G1sh/6y3Oy9IfT5SMFePDkrNu7RfOzNYEHx56oGPcgATzYD+5xx2AsxwMedMbNJNamYppKeNj/vPS8+FY1rU3f90NvyaM//k7JbpOTb2mXvYprCjwAD9nmG0xbmAx+cdqQeYgWSeAh2i9xtr2kjtXQ8NC7QDxvsly7ardi2RxL0T+UXf/vB3J6KPNw6g0rgYekOiHnrT7Aknmo7iMX2xHwoDNuJm0JePDEm3yrbNg/FhHW+5u3H7tVPnNU8Y6LE65slT16qyNkHsg8ZJsvmQe3xAp4cCseJlBgagM85AT0ihWNlX348I93S/OxRXiYOO1+4MG002BX/wGRzEP9fR5HOwcedMbNJPbAQ15AZ7XJPsVX3rZFP/z8Mrn6uCI8NJ2zUP7T9iAO2ZN5IPOQbY5kHtwSK+DBrXiYQIGpDfAQCOgd8ptBh9Sw1kXZ2y7Xfiyouyfe1Lmyo9bnrOHxgQfgIdu8gAe3xAp4cCsepmBgYtfQ8LC/RxZ8e3J+s6Svy3LdOzTbSfOBbpl9ehge5sg2xc/GAh6AB+DBwTYAPAAPqbxV0x9t9rZJc/6ug7lP2umvausP+2TOF8YVd5mceqdkFG/0ADw4KBwmVy+mNqx50ClCwIPOuJn0y4bOPGTVf5+0zcpdgTevaqBVD4f75N7LJoTg4Q55SPEdJ8AD8JDtzkxbuCVWwINb8TCBAlMb4EHkmZ9+ISuiX3+gge64OPy0LJsxMQQPt8tq4CG9Hd10QHDVjsyDzrYJPOiMm8k4ADyI7FvVnBPRu3pUz0TYFX63rL4i9HCs066Xla/bHcElazIPZB6y7ZHMg1tiBTy4FQ8TKDC1AR5EJLvTpCdeQ8HD+7J+5qRi5uGEGbICeEhvRzcdEFy1I/Ogs20CDzrjZjIOAA+NCg8HZP01xxXhYdIMWfGaS7kEu7KQeSDzQObBwTYAPAAPqb3bwh9xGjLzMCjrrz+pCA8TLpPlwEN6O7rJlYTLNmQedLZN4EFn3EzGAjIPjQsPG2/6eBEejr4UeDDpMNgkMxgCD8n4/UjbO/CgM24mcQceGhUeDsrG24INsjzxxl0grWQe0tvRTQYDl22AB51tE3jQGTeTsQB4aGB4uGNKMfPQNE1agIf0dnSTwcBlG+BBZ9sEHnTGzWQsAB4aFR4GZeOdZ4fgYTrwYNJhsElmMAQekvH7kbZ34EFn3EziDjw0Ljysn1MGD6/a3eHgkjV3Wzi40t5kADK1AR50ihDwoDNuJv0SeGhUeDggHcBDeju2SefXZAM86GyrwIPOuJmMDcAD8OD5DwZrmi4tZB7S29FNBgOXbYAHnW0TeNAZN5OxAHgAHoCHlKf8TQYC122AB50iBDzojJvJeAA8WMJD4RHeC6TySRi5J3TW/OmchTLkngaaFX8vqjyjrUpg2kJMOgg2bgx+wIMbcbDtD8CDzriZxBl4GCs8RD0Lo57w0Cxte4tw0HOXDxI2AAE8AA+KMi7Ag04RAh50xg14qIxbJCzZbE+dvepvlgV3+U/iLBVwkeTgQaRHFnieLOgtAsXo74AH4AF4iK0NLFnSLplMZvQ+JyKdnRlZtGhtbOc1GeSTtAEeKkUoyXjEee5IMY3oATzbIu+UPDy07c2JtTerTfYV/FUGDwXbgoFUiHwWXBZITwAw/uLF/DFz2YT81ET4iZ9Vjpv9XUm5RKTiN8BDwwzgcQ4YSR2LzINOEQIedMbNpJ8DD2OZtshnHPKCX1zjMFZ4CE+B5KHE86Rw3Kzwh7IKFSBQBgfZcpVmRfatai5ASQ5lgAfggcxDbG2AzEO0SAIP0X4xEWfXbYCHI4AHP4+QXWsQCPVY4SH4fU7Ws0Jfsn6h2nFz33uF7ESZfX46pXRKA3iITThc7+RpKB+ZB50iBDzojJvJmAE8hODhujWhKYjwVEPofcVVfz5TkBXuMtGusPWPU7Y2IZi2CJ2iMksQddzwnRbhzEXuQCXHyJajfDEl8AA8kHmIrQ2QeYgWSeAh2i8m4uy6DfAgIi8ul6+H1hqEdLzybRQQ5KcvFvRGiXxpViE+eCg/bllRQ+XMZkcKWYnADniITThc7+RpKB+ZB50iBDzojJvJmAE8BOsFigsVA3mNfA2Jcvj7YPqieVZorUJ5lsH/Qfb3ofULY848VIGH/JRK86o2WVBxV4hfEOABeCDzEFsbIPMQLZLAQ7RfTMTZdRvgoSjo3nn/V54JE0HU+xHgIcgo+Bs2FRY65tcalK9F8G0K6w9qCA+Sz4gEd2+UVgd4iE04XO/kaSgfmQedIgQ86IybyZgBPIjI4G/kDn/awpsbsWtkqeRW3vIY+j4v1kV4CIFJ9vg+NMS15qF65iEAmgKohIpK5kHRVbdJR067DfCgU4SAB51xMxlPgAdfUYPbI5ul7c8lCqv7PxFZjWKFyDyQeVAEUMCDThECHnTGDXiojFs0LOUWOvrTCTeufauor6rflS3erKgL8AA8AA+xtQHWPFQOtr4AAQ/RfjERZ9dtosW0Qmkk1TtM+rmHeflbHy9fLrsrq6/qk9zizcrbN0srATzEJhyud/I0lI/Mg04RAh50xs1kzAAecpL6Vsetkns6pSeT/0/VZZOlOqzyf8AD8EDmIbY2QOYhWiSBh2i/mIiz6zbAQ17539ogt06OeI6ESjAwKTTwEJtwuN7J01A+Mg86RQh40Bk3kzEDeCgK7e5f+E/KrJbuL9rrfgc8AA9kHmJrA2QeokUSeIj2i4k4u24DPAQI8JZsmD05NnjIPZ8itJ9DcBpnXoGH2ITD9U6ehvKRedApQsCDzriZjBnAQ17Ngy2qY8k85O90mNUsxQ2inKGGfEGAB+CBzENsbYDMQ7RIAg/RfjERZ9dtgIeclu5+4Ov5BZOT5YoVR3i/RXYXygXSE7y6xg3Z8gAPsQmH6508DeUj86BThIAHnXEzGTOAB19J/SmL3GLJyf/8Gxk8QrHPTllkH0RVtptkcNxg86aS7aMD29CGVXv9HwT/L1/Mmd+bovyBV8Gxg3ON+Ao8AA9kHmJrA2QeokUSeIj2i4k4u24DPPjquluWX5YT57lPjqi2hl/kRD3YEjq758KsttJHfQfQUCL8ASSUPjq7567w/3M2wfbXuXUV4e9Fop+gGVV04CE24XC9k6ehfGQedIoQ8KAzbiZjBvDgC2sg3NfKmn1RQmvxWfmVf/b/Zc+hiPosX4YADEY6Yykc5ModgEquHmXnGulAPFUzvZ3apONrswEedLZX4EFn3EzGB+Ah/PCq0qv4EXV3lC8qMw2l2YLsT8sBI/thOQgEJ8lPT+QfrFV+K2kJTPjHLc9yBIepeCXzQOaBaYvY2gDTFtEiCTxE+8VEnF23AR5C8HBV2xFuTR1kMPLrE8KC74XAxBgeKsGjBBZ8ICgcK3+HxyrT1AnwEJtwuN7J01A+Mg86RQh40Bk3kzEDeAjBg/FVe8VlfO6DgpCXfZ+96yK050OkXUTmIcKuAh4kv8ZiVZs0hwGlrAiV/wUegAcyD7G1ATIP0SIJPET7xUScXbcBHkRk6wKZ7GcJjgge8tMLJYsgA8ku+y4CCoJ1F8X1C0FWIbSGIfu7yh0wgw2p7PaUAB5iEw7XO3kaykfmQacIAQ8642YyZgAPgUh74h3JbZrl2YWAG/KvOYHPg4ApPPhLOe8KTYHc1RN9N0X23CHIKDt39H+BB+CBzENsbYDMQ7RIAg/RfjERZ9dtgIcQPERmDaKl16VPs2BinTUBHmITDtc7eRrKR+ZBpwgBDzrjZjJmAA/a4SFirYQR2QAPwAOZh9jaAJmHaJEEHqL9YiLOrtsAD1rhIb+OwvOk2t4Q0SwBPMQmHK538jSUj8yDThECHnTGzWTMAB60wkM0Eph/CjwAD2QeYmsDZB6iRRJ4iPaLiTi7bgM8AA/ZjaeapkvLq+bo4Zqll8lkZNGitbGJgesdt9HKR+ZBpwgBDzrjZjK+AA/AA/Cg6ArcpFOn0QZ40ClCwIPOuJmMIcAD8AA8AA/OZ2yAB50iBDzojBvwUBm3SFgaYfMl11Lz8ZaHNQ/OC6ZJB24UG+ChcjDTEHvgQWfcTNpWpJhGqNSbb74pGzZ0yrJlj6secyPrCzyIx5qH9HZyk4HAdRvgQWf7BB50xs1kPIgUU+AhwgNp+4jMg2oKNuncabIBHnSKEPCgM24mYwfwwJoH1jyw5sF5kAIedIoQ8KAzbsBDZdwiYalhpy0+KVlw8B8KxrRFZWMx6UDY1MdvwEN9/Bx3ewYedMbNpB1EimlEhp41DxFOUf3RAVk/51PAg0knwSb5ARB4SD4GY+kHwIPOuJnEGnho1GmLQdk4f7o0+VmHbOZhmix+WS8NsUlUyqdegAedIgQ86Iwb8FAZt0hYashpi4Py6D0XybgQPNz3Z+DB+bl/k06dRhvgoXIw0xBn4EFn3EzaVqSYRmgI0xYRTlH90QeSWRyCB++LsnDnIbU1IvNA5iHbeDs7H0tkm3KebREtksBDtF9MxNl1G+BBRJ6cm0vd39WjVjztC35Inlt8sRwVZB68L8rd3QftD+PIL4AH4CHbFIEHt8QKeHArHnECCfAgsm9VcwPCg8grLaXwMG/9+46ggH0xgAfgAXhwsA0AD8BDmqctnvnpF7Lw0PyL3faqpfgX+5eH4eE8ubMDeGDNg4MC5F8tseZBpwgBDzrjZpKhIPPwlmyYnbvjoHnVPsUoYF/0v664SMYXpi3OkhvagQfgAXgYUxtgzUO0SAIP0X4xEWfXbRoeHva2SXNeQBf02guw5l+8vfJimVCAhxPkyofeU1sdpi0cFf24BkAyDzpFCHjQGTeTftvQ8PDiGrnxksm59Q5es7QpvlVxLKr/zuor5YQCPEyUS1e+NZbDOPEb4AF4yDZEFky6JVbAg1vxMIECU5uGhodgf4esgM6VRrrXwh9o31lzg0wpwIMnF7XulsO2KBDK3BS2uvY8iTOLk13QOqtNRptUAh6AB+DBwTYAPAAPqVwwGYaH2RtE73W3reLn7N/ruFM+E4KHC+59QoZtD5WFh2Zp2xv+YY8s8I9bRfDDvxjtPfDgoCiYXp3EZce0hU4RAh50xs2k35J5yC2WvHFto6GDyHsb58m5YXi4s0usd3qIhIccCvTc5YkXw94ZwAPwwN0WStsA8AA8pDrzcN4C6Rkc7do3nd8d7LtHPt+Uf7aF58m5N6yXN22rOgo8SMR3WaAoAMuC0qmicCYolLUoh4fs/z1PwnfHMG2hVFxMrnB8GzIPOkUIeNAZN5N+2fCZh6nN0vJsA5KDiBze0yJfmliEh5OvXC5/sJ23iACEIn/kpi8K6x96F5SshciCRAAJ5cfZ2yZt+btfSuAhDxiFY+ZPVoCHRYvWZrcn5jVdfrCFh3rH3/ZWzXqXL6nz2cJDUuXkvGMbL1pb18m2bduKY37EuyDzkAYfF+o7PCiDtmIZ4Ru1H722Qq48uQgPx16xUJ54w7I25aJf8vMyeCj5LniaaT77kIWCskxE3r4AD9lzlWYcgkN6fgPOZDL8pdgH1QYpvzEk2Q5cL19S/QO/pHtc2rFjRzAOR74ODg5Kd3d3asbmF154IbKeDfXh/tVyy1lFeJhw8W3SvsuSpizhIZhyKN6ZEQBDfpFl2XSEH48cPDRn9+MIT1WEY+WF/8N7PIAH8AAewAN4oEYe2N8hP5o6Lr/PhSdNZ14ry7Zaph5Gg4eybELJNIVfpbLvs7XMT0v4cBFMTeSAo1maZ418BwfwUKM2wmHxAB7AA3gAD5R4YKBP5l8wqQAP3kcvlLmbdtndrjkiPOyTtlnhKYaIKYwoeMgXMHynRmHaQvLZiYg7OICHksjyHzyAB/AAHsADNfLAod3Scu0pRXhoOkOuXf6E/NXmdJHwkBf5YDFk9ni5z4rTDnkbr7jmIcg0iJSCRxEeRHJ3cFTeAgo82AQNWzyAB/AAHsADY/bAS7LquilFePA+Il+e87A894HFPpNZeCiumwjWMhRBIFS40JSE50NDSeYhBwzB78P7Q5TAg3+44DghOAEeQn7mLR7AA3gAD+CB2nngDcnc/tnQkzWbZMqs+2TDX/aLtudrAg+1ayUcGQ/gATyAB/BAyAND8ofFM0MPx/LkI9NvkZbOPvnPDz4M2bn/FnhwP0aUEA/gATyAB1LhgcPyfPtNcs740LTD6TPltnt+KZ0v7BVN+AA8pKJBUgk8gAfwAB7Q4IE/d8+Xy0MbRXmTvihf/c58WfzrzfKuhgrky5jfJOpxyWT4S6sPTDYb6uvbklgb6O/vr9plenv7EitfUu1i+/btVf2ydau/yRt9V6MPqm0S5Qe/u/up1MR3165dVdtzIxgceHql3HTuhNCiyVPlv029Tm6++efymiIHsD11yrflNtmeenh4WDo7H0tke3J/e+qurq5Ru8zQ0JB0dmYSKV9SWwP721Nv3rx5VL/4X27a1NlQfkkqHnGft7Bd8ygRHhgYkA0b0hFfv769vfkHJ4xS50b46vDeR2Xh5afI0YWHVXnSdOznZfq3H9AJDyYPdMFG38N6bOBh8eJHpJ4xXrdur9jAw5IlHXUtXz19ET5Xe/sesYWH8O9573Y/NX0wVgAPS5duVN3u16zZLcBDCIsG++SXsz8nU8LrHrxj5aRL/01eD5lVe5t7WmaztO2tZlmb7wuZBwYctwecscYHeNAXV+BBX8xs+ifwUBsxU3PUw6/Ko/c1yyUnh6cuPJk0418t4MHf8Cm3fXRxE6j6egB44JHcEkxbkHlwQ7SABzfiYAMENrbAQ31Fzr2zHZT/eHieXP+5M6UpNHVxtA08+Js2+Rs2Ba8JVBJ4AB6AB8faAPAAPPhawLRFAopYl1Melv/q+bXc+b2vynEheDjmimXGmQd/yiKbcYjcqjp4Kmab9Kxqzi3M9J9NEewuWb5bZNmOlYVMRv7z8p0r/XP7/4AHx4TD5grGxJZpC31CBDzoi5lJXwxsyDzURaGdPskH+3rkV/N/KH8zJnjITVnk1jrkt5gue3BV7qmY4YdkhbaYLrHdJ213tcm+wFslwBB17NzzMYCHlIODP1gBD/qECHjQF7MADExegYdAqRr49cAOeWr5HJk+rrhZlGnmofy5EzlQyD/sKu/SqM9ymYdqCyxLH5BVyFYEocpPk/j/JfOQcoAAHvQJEfCgL2Ym0BDYAA+BEjXw6/Dz8tTDC+XSo23hISIbUJItyPm0HDCynwbTFhVuD562WSxLYeoi/0juYOqiMF0CPKR7kPIHK+BBX4yBB30xC8DA5BV4qFCvxvvg3S3y7/f9WC46qijYRpmHsvUJhSdi+tMfoSdeGsNDBXiUZR5EJHtLqD/VkbUtZi7IPJB5YMGkY20AeAAefDVlwWRameKwvPtCu/zkhzPls0128BAJBb6bslmForBH2kVkHirtKuEhBw0LpM1ffBlaLwE8OCYcJlcuNjZkHvQJEfCgL2Y2fZLMQ1qhwLReA/Lcowvlxs//rZwUWjA54VsPVLnbIje9UJxSCJ+v9LtKKAgAY/S1EdnfeWULLSU/VeJ5Ekxf+GcGHoAHMg+OtQHgAXjwB2cyD2FxTNH7w3vksaWz5asfmyjjQvAwsRo8lGUXyj2SnV7wcnBgCg8SAgN/CqR5VY+0zSqHh+Ktn4W7MoCHdA9S/tUQmQd9MQYe9MWMzMPIMWN76jKZH+iWX936HTkjtN4hu3bhK7+oknkoO07d/hsxlQE8jNzgbQYDl22BB30xBh70xcxmDGDaom6q5+aJdq+X+d86Vz4ayjr48DDJVXiIWCvhO5ZpC8dS1jaDkIkt8KBPiIAHfTEz6YuBDfDgpqbXq1TDTz8o13/6xNDW1EfLUROPlTO++e9uZR4Kd3YUF2KGfQQ8AA+seXCsDQAPwIM/SLPmISxV6Xk/2LdQvnFiU27baD/7cMwp8ukLvyn/MH+T7FdUTeDBMeEIrk7ieiXzoE+IgAd9MbPpr2QeFClkDYr6avst8qnQzpJNU74s35/zU1n+0OPydg3OV6tDAg/AA5kHx9oA8AA8+AM+mYdayV6Sx/1Anrn/ypJbNI/98nXyLz/bLL/v3yGDSRbN8tzAg2PCYXMFY2JL5kGfEAEP+mJm0hcDGzIPliqVKvPXpHP2lXJ8YbHkODnz6rmyrusNee/9AVU1BR6ABzIPjrUB4AF48FWEzIMqLTUs7Euy+rrz5egCPBwnF9y+XHa9ctjw9+6YAQ+OCUdwdRLXK5kHfUIEPOiLmU1/JfPgjgDWvSSHdkvrNacWF0uOO1Ouuy8jbw3XvSRHfELgAXgg8+BYGwAegAd/ZCfzcMT65t4B3uuWedOPKcLDpK/JnWt2yiH3Slq1RMCDY8JhcwVjYkvmQZ8QAQ/6YmbSFwMbMg9VdSm9Bvs75EdTQ7dpnnyt3N/zlsr6Ag/AA5kHx9oA8AA8+GpC5kGlpo5e6P2r5Kazik/S9E6fLatf0Jh3YIdJCa4G0vpK5kGfEAEP+mJmM36QeRhdX1P97WvL5fITi/Bw1Pnz5HeaNncIBYfMg2NXnTaDkGtIN30AABCSSURBVIkt8KBPiIAHfTEz6YuBDfAQUqAGe3to92I5f0IRHibNbJFnFS6W9MMGPAAPTFs41gaAB+DBH5yZtkgfWRx4YqF8tqkIDyfNelj69d2lmQ0M8OCYcARXJ3G9knnQJ0TAg76Y2fRXMg/pgwLTGr23ca6cW9jjwZO/va1TXjH9sWN2wAPwQObBsTYAPAAPvk6QeXBMLWMozjsdd8g5IXi4YOETqrakDrsAeHBMOGyuYExsyTzoEyLgQV/MTPpiYEPmISxBjfX+nTXXyWkheLio5Qn5QKkLgAfggcyDY20AeAAefD0h86BUVUcp9turw8+1mCSXPvi8yg2i/CoCD44JR3B1EtcrmQd9QgQ86IuZTX8l8zCKuqb8q7dXXSYTCpmHj8tVa5Tepwk8pHuQ8gc04EFfjIEHfTEDHkaO2Zo1u6W1dZ309vamHA2qV++vKy6R8QV4mCq3tL9X/UeOWpB5IPPAtIVjbQB4GFmIbETaVVsyD46qYR2K9fqyi2RcAR7Okzs7gIfU79To6kBUrVxkHvQJEfCgL2bV+mH4e+ChDirt6Cn2Lr5YjgrBw9z17zta0urFIvPg2FVneJCJ4z3woE+IgAd9MbPpq8BDdWFKp8WH0l8GD/N7tN5rwYLJ1GdMgAd9QgQ86IsZ8DByzFjzEKDQB5K5PzRt0fRFuecFpdtLsmBy5AZvMxi4bAs86Isx8KAvZjZjAJmHQEwb7fWgPLowDA/T5N7den3AtAXTFiyYdKwNAA/Agy8p7POgV1ijSz4oG+dNk6ZgzUPTNLl/b7Slhk+BB8eEw+YKxsSWzIM+IQIe9MXMpC8GNmQeNEhjLcp4QDrmTBUvBA8tr9XiPPU5JvAAPJB5cKwNAA/Agz/8k3mojwjW7yw+PHwyBA/TBXhwbPANCJ9XNonS2AaAB+ABeKifpNfvTD48nF0KD6/W7+xxn4nMQ8rBh2kLfUIEPOiLmQ2kMm0Rt4xpOR7wkPrbG20GAtdtgQd9QgQ86IuZzTgAPGgR+7jLCTwAD4qyFcCDPiECHvTFDHgYOWbs8xBACPAAPAAPsbSBdev2ypIl7dLV1RX0rsjXoaEh6ezMyJIlHbGc12agT8IWeBhZiJKIR9znJPMQ2c0b4EPgoSEG8LgHjKSOR+ZBnxABD/piZtO/gYcG4ITIKgIPwAOZh1jaAJmHaJEEHqL9YiPQLtsCD5HK2gAfAg+xCIfLnTtNZSPzoE+IgAd9MbMZM4CHBuCEyCoCD8ADmYdY2gCZh2iRBB6i/WIj0C7bAg+RytoAHwIPsQiHy507TWUj86BPiIAHfTGzGTOAhwbghMgqAg/AA5mHWNoAmYdokQQeov1iI9Au2wIPkcraAB8CD7EIh8udO01lI/OgT4iAB30xsxkzgIcG4ITIKgIPwAOZh1jaAJmHaJEEHqL9YiPQLtsCD5HK2gAfAg+xCIfLnTtNZSPzoE+IgAd9MbMZM4CHBuCEyCoCD8ADmYdY2gCZh2iRBB6i/WIj0C7bAg+RytoAHwIPsQiHy507TWUj86BPiIAHfTGzGTOAhwbghMgqAg/AA5mHWNoAmYdokQQeov1iI9Au2wIPkcraAB8CD7EIh8udO01lI/OgT4iAB30xsxkzgIcG4ITIKqYUHhYtWiv8pc8HNvCQRPxtnqqZRPmSOmdLS7ts3rw5cggKf7hpUyf9VuHY1dq6TrZt2xYOZcX7gYEB2bAhHfH169vb21tRx8b6IGXw4DfgTCbDX4p9UG2Q8jvwli1bEmsD/f39VceQvr6+xMqXVP/Yvn17Vb/Qf/WOXTt27Kga3+7u7tS0+507d1atb7oNUgYP6Q4WtcMDeAAP4AE84IIHgAcXokAZ8AAewAN4AA8o8gDwoChYFBUP4AE8gAfwgAseAB5ciAJlwAN4AA/gATygyAPAg6JgUVQ8gAfwAB7AAy54AHhwIQqUAQ/gATyAB/CAIg8AD4qCRVHxAB7AA3gAD7jgAeDBhShQBjyAB/AAHsADijyQMnjIbTLzuGQy/KXVByabRPX1+ZtEJdMGTDaJ6u31N4lKpnxJnddkk6itW/1N3hrLL2mpr9kmUU+lJr67du1SJPS1KGrK4MHfPS+pLXg5b+23w2Z76tr7uBbtmO2pdcbNtC2wPXUtxNn1Y6YUHmwe7IKtngf32MDD4sWP1PWhZzxVM7od8WCsaL+kZdzhwViui3ytygc81FVg0jJgJFUP4EGfEAEP+mJm07+Bh1qJs+vHBR6Ahw49gxvwoCdWgQABD/piFsTO5BV4cF3ka1U+4AF4AB5iaQNMW0SLJPAQ7RcTYdZgAzzUSpxdPy7wEItwaOjkaSgjmQd9QgQ86IuZzVgBPLgu8rUqH/AAPJB5iKUNkHmIFkngIdovNgLtsi3wUCtxdv24wEMswuFy505T2cg86BMi4EFfzGzGDODBdZGvVfmAB+CBzEMsbYDMQ7RIAg/RfrERaJdtgYdaibPrxwUeYhEOlzt3mspG5kGfEAEP+mJmM2YAD66LfK3KBzwAD2QeYmkDZB6iRRJ4iPaLjUC7bAs81EqcXT8u8BCLcLjcudNUNjIP+oQIeNAXM5sxA3hwXeRrVT7gAXgg8xBLGyDzEC2SwEO0X2wE2mVb4KFW4uz6cQ/I+jlni+d5ub+m6dLyqutlHrl8XvBgLJc7G2Ub+2BK5mHsvkuq3QEP+mJm01aAh5EFKd3fDAIPNh0F22QHQuAhWf+Ppf0DD/piZhNn4CHdiDBy7QZl451nlWYeXhvZ2vVvyDwomoKwGaACW+BBnxABD/piFvQ3k1fgwXVZrFX5DsrGO6aE4GGatAAP6e7sJgOCqzbAg762CTzoi5lN/wceaiXOrh93UNbPnlyEh3EXSCvwkO7ObjMwuGYLPOhrm8CDvpjZ9HvgwXWRr1X5BmX9TR8vwsP4S2QZ8JDuzm4zMLhmCzzoa5vAg76Y2fR74KFW4uz6ccvgYcJlshx4SHdntxkYXLMFHvS1TeBBX8xs+j3w4LrI16p8g7L+muOLmYcJM2QF8JDuzm4zMLhmCzzoa5vAg76Y2fR74KFW4uz6cQdl/cxJRXiYOENWvO56mUcuH3dbcLeFDA8PS2fnY7J48SOxbP5kOpCySVS0SAIP0X4xbVeu2wEPIwtSur95Tx6aeUwRHo6/Uh4EHtLd2V0fjEYrH5kHfW0TeNAXs9H6YPl3wEO6EWHk2r0sD14RgodTr5VV+0e2dv0bMg9kHsg8ONYGgAfgwReOgYEB2bChU5Yu3VjXjGA57Bzp/9es2S2treukt7fXdT2sbfkOb5OWKyYWMw9TfySrgYd0d/Yj7TxJ/p7Mg762CTzoi5lNHyfzUFuNdvboh/rk3hkTQvBwh6x5w9nSVi0YmQfHrjptBiETW+BBnxABD/piZtIXAxvgoaoupdPgwx6Ze8H4EDzMkc539FYVeAAemLZwrA0AD8CDLylMW+gV1siSH/yNzD47/0RN/8maU+fKM0ORlio+BB4cE47g6iSuVzIP+oQIeNAXM5v+SuZBhTbGX8jX18h1JxfhoemchfJC/Gep2xGBB+CBzINjbQB4AB58BSDzUDcdrMuJhp9bLlcfV4SHY6bfJ3vqcubanAR4cEw4bK5gTGzJPOgTIuBBX8xM+mJgQ+ahNmLm+lHf+N3CEnj42NWtwEPQKXh1b9ADHtyLSbV+Ajzoi1m1mIa/Bx5cl/lalG9YXnzkx3LhxGLm4dQbHpS9tThVnY5J5oHMA9MWjrUB4AF48Md/pi3qpIJ1Oc2b0vOzH8hpTUV4mHzTQ8BDmKp579bAR+bBrXiY9A/gQV/MTOIa2JB5qItau3WS4Z2yae50meTfZZH/m3J7l7zvVimtSkPmwbGrzmCAiesVeNAnRMCDvpjZ9FfgwUqjUmH8/hu/lX/77iekqQAP4+Xcuc/IIcW1Ax6AB6YtHGsDwAPw4GsK0xaKlTVU9KGhg7Kj75dyx7TQEzW9Y2Xmvc+HrPS9BR4cEw6bKxgTWzIP+oQIeNAXM5O+GNiQedAnlGMv8QF5afdu6frl3dL8ieKUheedJFev2Dn2wzrwS+ABeCDz4FgbAB6AB18byDw4oJBHWoQ3/iSbNnTIyvnfk8+ND8PDGfKDjpeO9OiJ/h54cEw4gquTuF7JPOgTIuBBX8xs+iuZh0Q1r64nP/RshyxfulB+fP05clJhvYMn3qSvyJxezcslRYAH4IHMg2NtAHgAHnyFI/NQV52vyck+yNwt827/rvyPLx1buMsie7fFGVfJ/b8frsk563VQ4MEx4bC5gjGxJfOgT4iAB30xM+mLgQ2Zh3rJW/LnOdDxXbl+5t/LySeEpyw8Oeq/3yKrnz2cfAGPoATAA/BA5sGxNgA8AA/+mE7m4QiUzZGfDnbcINM+dVxp1sHzZOIFc6RzvyOFHGMxgAfHhCO4OonrlcyDPiECHvTFzKa/knkYo1op/Nlgxy0y5aOlWQd/2uLkyxdLv+5ZC9Y82HR6jbbAgz4hAh70xcxmbAAeFFLAGIvsw8Op4YWS2fcT5O9mrxPliQfgwabTa7QFHvQJEfCgL2Y2YwPwMEYlVvgzHx5OqYCHk+QbCzPyjsL6hIvMtAXTFqx5cKwNAA/Agz9Is+YhLFU630fCwzGflpt/tkUGdVapUGrgwTHhsLmCMbEl86BPiIAHfTEz6YuBDZmHgv6k/k0UPBw7eabcvell0X2vBfs8SNCh0/oKPOgTIuBBX8xsxg/gIfXMUKhgJTyMk785/0ey6j8OFGy0viHzQOaBaQvH2gDwADz4gsK0hVZZLZa7Eh6Ol0u+v1h6Xy7aaH0HPDgmHDZXMCa2ZB70CRHwoC9mJn0xsCHzoFUu7ctdDg9Nx/6dfP/ujbJdf+KBuy2CDp3WV+BBnxABD/piZjN+AA/2Iqz1F+XwcNzfXyZzfvVHeUVrhULlJvNA5oFpC8faAPAAPPhjNNMWIaVS+rYUHo6Ss6++VVY++b4MKa1PuNjAg2PCYXMFY2JL5kGfEAEP+mJm0hcDGzIPYQlK9/sSeDjlU3L5nH+Vzpe132eRixnwADyQeXCsDQAPwIM/PJN50A8WPjxMHn+MeMefKKd87R9l3s/75MUP9dfLrwHw4JhwBFcncb2SedAnRMCDvpjZ9FcyD+kQT5Na+PAw5fSpcsrXLpWZ/7RUOp/cq35zqKDewAPwQObBsTYAPAAP/gBN5iGQKb2vBzqul4suvVKu+N/z5aerHpc9+7TvK1mMBfDgmHDYXMGY2JJ50CdEwIO+mJn0xcCGzENRgNL+7sD6G+S2O+bLT5ZvlGd2/kmGDqWnxsAD8EDmwbE2ADwAD77EkHnQL7QfblkpD65cLR2P/l7eHkjB5g6hkAAPjglHcHUS1yuZB31CBDzoi5lNfyXzEFKgtL/9yx/l2f5++fOeN1JXU+ABeCDz4FgbAB6AB19pyDykQG8PDcngwYMpqEhlFQrwsGjRWuEvfT6wyTwkEf8lS9qlq6ursmWGPhkaGpLOzkxDtc+WlnbZvHlzyAvRbzdt6mwovyTRRmtxztbWdbJt27booOY/DeChFuev9zH9+vb29o5aX77U5QHPb8CZTIa/FPug2iDlN9ktW7Yk1gb6+/ur9pq+vr7EypdU/9i+fXtVv9B/9Y5dO3bsqBrf7u7u1LT7nTt3Vq0vBno88P8BcYvUTnQsadwAAAAASUVORK5CYII="
    }
   },
   "cell_type": "markdown",
   "id": "9dc0994e54811dd0",
   "metadata": {},
   "source": [
    "Dask arrays是由多个numpy arrays（or numpy-like）组成的，这些arrays的组合方式显著影响着性能，对于不同的算法，不同的组合方式，可能使这个算法执行的更快或者更慢。\n",
    "\n",
    "![image.png](attachment:e7183a3b-57d2-4a0c-8c69-d575e063fcf2.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "d7a935de43f58aae",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-29T15:18:52.815374Z",
     "start_time": "2024-05-29T15:18:52.728712Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100 499999.5\n",
      "当前耗时：1.7932193279266357\n",
      "执行前3的如下：\n",
      "耗时：1.7932193279266357 参数配置：(100,)\n",
      "1000 499999.5\n",
      "当前耗时：0.16368532180786133\n",
      "执行前3的如下：\n",
      "耗时：0.16368532180786133 参数配置：(1000,)\n",
      "耗时：1.7932193279266357 参数配置：(100,)\n",
      "10000 499999.5\n",
      "当前耗时：0.026955366134643555\n",
      "执行前3的如下：\n",
      "耗时：0.026955366134643555 参数配置：(10000,)\n",
      "耗时：0.16368532180786133 参数配置：(1000,)\n",
      "耗时：1.7932193279266357 参数配置：(100,)\n",
      "(500000, 500000) 499999.5\n",
      "当前耗时：0.004243612289428711\n",
      "执行前3的如下：\n",
      "耗时：0.004243612289428711 参数配置：((500000, 500000),)\n",
      "耗时：0.026955366134643555 参数配置：(10000,)\n",
      "耗时：0.16368532180786133 参数配置：(1000,)\n",
      "(100000, 400000, 500000) 499999.5\n",
      "当前耗时：0.004991054534912109\n",
      "执行前3的如下：\n",
      "耗时：0.004243612289428711 参数配置：((500000, 500000),)\n",
      "耗时：0.004991054534912109 参数配置：((100000, 400000, 500000),)\n",
      "耗时：0.026955366134643555 参数配置：(10000,)\n",
      "auto 499999.5\n",
      "当前耗时：0.004010677337646484\n",
      "执行前3的如下：\n",
      "耗时：0.004010677337646484 参数配置：('auto',)\n",
      "耗时：0.004243612289428711 参数配置：((500000, 500000),)\n",
      "耗时：0.004991054534912109 参数配置：((100000, 400000, 500000),)\n"
     ]
    }
   ],
   "source": [
    "import dask as dd\n",
    "from dask import array as da\n",
    "import numpy as np\n",
    "from utils import clocked\n",
    "\n",
    "arr = np.array(range(1000000))\n",
    "\n",
    "\n",
    "@clocked()\n",
    "def compute_mean(chunks=100):\n",
    "    x = da.from_array(arr, chunks=chunks)\n",
    "    print(chunks, x.mean().compute())\n",
    "\n",
    "chunks_list = [100,1000,10000,(500000,500000),(100000,400000,500000),'auto']\n",
    "for chunks in chunks_list:\n",
    "    compute_mean(chunks)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6258da22-919c-49c8-b8d4-6b33c20b7d35",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-29T14:14:20.428491Z",
     "start_time": "2024-05-29T14:14:20.426922Z"
    }
   },
   "source": [
    "## 指定每个chunk的形状"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "cd77119a1486c1c7",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-29T15:23:47.010389Z",
     "start_time": "2024-05-29T15:23:47.005194Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(100,)\n",
      "((25, 25, 25, 25),)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <tr>\n",
       "        <td>\n",
       "            <table style=\"border-collapse: collapse;\">\n",
       "                <thead>\n",
       "                    <tr>\n",
       "                        <td> </td>\n",
       "                        <th> Array </th>\n",
       "                        <th> Chunk </th>\n",
       "                    </tr>\n",
       "                </thead>\n",
       "                <tbody>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Bytes </th>\n",
       "                        <td> 400 B </td>\n",
       "                        <td> 100 B </td>\n",
       "                    </tr>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Shape </th>\n",
       "                        <td> (100,) </td>\n",
       "                        <td> (25,) </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Dask graph </th>\n",
       "                        <td colspan=\"2\"> 4 chunks in 1 graph layer </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Data type </th>\n",
       "                        <td colspan=\"2\"> int32 numpy.ndarray </td>\n",
       "                    </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </td>\n",
       "        <td>\n",
       "        <svg width=\"170\" height=\"75\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
       "\n",
       "  <!-- Horizontal lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"0\" y1=\"25\" x2=\"120\" y2=\"25\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Vertical lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"25\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"30\" y1=\"0\" x2=\"30\" y2=\"25\" />\n",
       "  <line x1=\"60\" y1=\"0\" x2=\"60\" y2=\"25\" />\n",
       "  <line x1=\"90\" y1=\"0\" x2=\"90\" y2=\"25\" />\n",
       "  <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"25\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Colored Rectangle -->\n",
       "  <polygon points=\"0.0,0.0 120.0,0.0 120.0,25.412616514582485 0.0,25.412616514582485\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
       "\n",
       "  <!-- Text -->\n",
       "  <text x=\"60.000000\" y=\"45.412617\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >100</text>\n",
       "  <text x=\"140.000000\" y=\"12.706308\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,12.706308)\">1</text>\n",
       "</svg>\n",
       "        </td>\n",
       "    </tr>\n",
       "</table>"
      ],
      "text/plain": [
       "dask.array<array, shape=(100,), dtype=int32, chunksize=(25,), chunktype=numpy.ndarray>"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 原数组为含有100个元素的一维数组\n",
    "arr2 = np.array(range(100))\n",
    "\n",
    "# 一维\n",
    "# 1.平均每块的大小 平均每块25个元素\n",
    "darr = da.from_array(arr2, chunks=(25,))\n",
    "print(darr.shape)\n",
    "print(darr.chunks)\n",
    "darr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "c3a1c329-9c89-4cfc-9cd3-7818a558b68d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <tr>\n",
       "        <td>\n",
       "            <table style=\"border-collapse: collapse;\">\n",
       "                <thead>\n",
       "                    <tr>\n",
       "                        <td> </td>\n",
       "                        <th> Array </th>\n",
       "                        <th> Chunk </th>\n",
       "                    </tr>\n",
       "                </thead>\n",
       "                <tbody>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Bytes </th>\n",
       "                        <td> 100 B </td>\n",
       "                        <td> 100 B </td>\n",
       "                    </tr>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Shape </th>\n",
       "                        <td> (25,) </td>\n",
       "                        <td> (25,) </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Dask graph </th>\n",
       "                        <td colspan=\"2\"> 1 chunks in 2 graph layers </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Data type </th>\n",
       "                        <td colspan=\"2\"> int32 numpy.ndarray </td>\n",
       "                    </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </td>\n",
       "        <td>\n",
       "        <svg width=\"170\" height=\"83\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
       "\n",
       "  <!-- Horizontal lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"0\" y1=\"33\" x2=\"120\" y2=\"33\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Vertical lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"33\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"33\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Colored Rectangle -->\n",
       "  <polygon points=\"0.0,0.0 120.0,0.0 120.0,33.13387930424821 0.0,33.13387930424821\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
       "\n",
       "  <!-- Text -->\n",
       "  <text x=\"60.000000\" y=\"53.133879\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >25</text>\n",
       "  <text x=\"140.000000\" y=\"16.566940\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,16.566940)\">1</text>\n",
       "</svg>\n",
       "        </td>\n",
       "    </tr>\n",
       "</table>"
      ],
      "text/plain": [
       "dask.array<blocks, shape=(25,), dtype=int32, chunksize=(25,), chunktype=numpy.ndarray>"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 获取指定的chunk\n",
    "chunk = darr.blocks[0]\n",
    "chunk\n",
    "#chunk.compute()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "09e2152b-058f-4d36-9e30-78981860c93e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(100,)\n",
      "((30, 30, 40),)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <tr>\n",
       "        <td>\n",
       "            <table style=\"border-collapse: collapse;\">\n",
       "                <thead>\n",
       "                    <tr>\n",
       "                        <td> </td>\n",
       "                        <th> Array </th>\n",
       "                        <th> Chunk </th>\n",
       "                    </tr>\n",
       "                </thead>\n",
       "                <tbody>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Bytes </th>\n",
       "                        <td> 400 B </td>\n",
       "                        <td> 160 B </td>\n",
       "                    </tr>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Shape </th>\n",
       "                        <td> (100,) </td>\n",
       "                        <td> (40,) </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Dask graph </th>\n",
       "                        <td colspan=\"2\"> 3 chunks in 1 graph layer </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Data type </th>\n",
       "                        <td colspan=\"2\"> int32 numpy.ndarray </td>\n",
       "                    </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </td>\n",
       "        <td>\n",
       "        <svg width=\"170\" height=\"75\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
       "\n",
       "  <!-- Horizontal lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"0\" y1=\"25\" x2=\"120\" y2=\"25\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Vertical lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"25\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"36\" y1=\"0\" x2=\"36\" y2=\"25\" />\n",
       "  <line x1=\"72\" y1=\"0\" x2=\"72\" y2=\"25\" />\n",
       "  <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"25\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Colored Rectangle -->\n",
       "  <polygon points=\"0.0,0.0 120.0,0.0 120.0,25.412616514582485 0.0,25.412616514582485\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
       "\n",
       "  <!-- Text -->\n",
       "  <text x=\"60.000000\" y=\"45.412617\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >100</text>\n",
       "  <text x=\"140.000000\" y=\"12.706308\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,12.706308)\">1</text>\n",
       "</svg>\n",
       "        </td>\n",
       "    </tr>\n",
       "</table>"
      ],
      "text/plain": [
       "dask.array<array, shape=(100,), dtype=int32, chunksize=(40,), chunktype=numpy.ndarray>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 2.指定每一块的大小 第一块30个元素，第二块30个元素，第三块40个元素\n",
    "darr = da.from_array(arr2, chunks=(30,30,40))\n",
    "print(darr.shape)\n",
    "print(darr.chunks)\n",
    "darr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "41a0ef84a35fd6c6",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-29T15:58:00.032353Z",
     "start_time": "2024-05-29T15:58:00.026023Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1 2 3 4 5 6]\n",
      " [7 8 9 0 1 2]\n",
      " [3 4 5 6 7 8]\n",
      " [9 0 1 2 3 4]\n",
      " [5 6 7 8 9 0]\n",
      " [1 2 3 4 5 6]]\n",
      "(6, 6)\n",
      "((3, 3), (3, 3))\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <tr>\n",
       "        <td>\n",
       "            <table style=\"border-collapse: collapse;\">\n",
       "                <thead>\n",
       "                    <tr>\n",
       "                        <td> </td>\n",
       "                        <th> Array </th>\n",
       "                        <th> Chunk </th>\n",
       "                    </tr>\n",
       "                </thead>\n",
       "                <tbody>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Bytes </th>\n",
       "                        <td> 144 B </td>\n",
       "                        <td> 36 B </td>\n",
       "                    </tr>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Shape </th>\n",
       "                        <td> (6, 6) </td>\n",
       "                        <td> (3, 3) </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Dask graph </th>\n",
       "                        <td colspan=\"2\"> 4 chunks in 1 graph layer </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Data type </th>\n",
       "                        <td colspan=\"2\"> int32 numpy.ndarray </td>\n",
       "                    </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </td>\n",
       "        <td>\n",
       "        <svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
       "\n",
       "  <!-- Horizontal lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"0\" y1=\"60\" x2=\"120\" y2=\"60\" />\n",
       "  <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Vertical lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"60\" y1=\"0\" x2=\"60\" y2=\"120\" />\n",
       "  <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Colored Rectangle -->\n",
       "  <polygon points=\"0.0,0.0 120.0,0.0 120.0,120.0 0.0,120.0\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
       "\n",
       "  <!-- Text -->\n",
       "  <text x=\"60.000000\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >6</text>\n",
       "  <text x=\"140.000000\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,60.000000)\">6</text>\n",
       "</svg>\n",
       "        </td>\n",
       "    </tr>\n",
       "</table>"
      ],
      "text/plain": [
       "dask.array<array, shape=(6, 6), dtype=int32, chunksize=(3, 3), chunktype=numpy.ndarray>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 二维\n",
    "arr3 = np.array([[1, 2, 3, 4, 5, 6],\n",
    "        [7, 8, 9, 0, 1, 2],\n",
    "        [3, 4, 5, 6, 7, 8],\n",
    "        [9, 0, 1, 2, 3, 4],\n",
    "        [5, 6, 7, 8, 9, 0],\n",
    "        [1, 2, 3, 4, 5, 6]])\n",
    "print(arr3)\n",
    "\n",
    "# 1.每个纬度（第一纬度和第二纬度）都是3 第一纬度：6/3=2 第二纬度：6/3=2  会被分成：2x2=4块 \n",
    "darr = da.from_array(arr3, chunks=3)\n",
    "print(darr.shape)\n",
    "print(darr.chunks)\n",
    "darr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a2f33272-0477-415f-9177-10b576d95113",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "((3, 3), (2, 2, 2))\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <tr>\n",
       "        <td>\n",
       "            <table style=\"border-collapse: collapse;\">\n",
       "                <thead>\n",
       "                    <tr>\n",
       "                        <td> </td>\n",
       "                        <th> Array </th>\n",
       "                        <th> Chunk </th>\n",
       "                    </tr>\n",
       "                </thead>\n",
       "                <tbody>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Bytes </th>\n",
       "                        <td> 144 B </td>\n",
       "                        <td> 24 B </td>\n",
       "                    </tr>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Shape </th>\n",
       "                        <td> (6, 6) </td>\n",
       "                        <td> (3, 2) </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Dask graph </th>\n",
       "                        <td colspan=\"2\"> 6 chunks in 1 graph layer </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Data type </th>\n",
       "                        <td colspan=\"2\"> int32 numpy.ndarray </td>\n",
       "                    </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </td>\n",
       "        <td>\n",
       "        <svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
       "\n",
       "  <!-- Horizontal lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"0\" y1=\"60\" x2=\"120\" y2=\"60\" />\n",
       "  <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Vertical lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"40\" y1=\"0\" x2=\"40\" y2=\"120\" />\n",
       "  <line x1=\"80\" y1=\"0\" x2=\"80\" y2=\"120\" />\n",
       "  <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Colored Rectangle -->\n",
       "  <polygon points=\"0.0,0.0 120.0,0.0 120.0,120.0 0.0,120.0\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
       "\n",
       "  <!-- Text -->\n",
       "  <text x=\"60.000000\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >6</text>\n",
       "  <text x=\"140.000000\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,60.000000)\">6</text>\n",
       "</svg>\n",
       "        </td>\n",
       "    </tr>\n",
       "</table>"
      ],
      "text/plain": [
       "dask.array<array, shape=(6, 6), dtype=int32, chunksize=(3, 2), chunktype=numpy.ndarray>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 2.每块第一个纬度为3，第二个纬度为2   (6/3)x(6/2)=6块\n",
    "darr = da.from_array(arr3, chunks=(3,2))\n",
    "print(darr.chunks)\n",
    "darr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "a3a8d4ad-2541-4942-bc12-3b2e3af4ede8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "((1, 1, 1, 1, 1, 1), (6,))\n",
      "((2, 4), (3, 3))\n",
      "((2, 2, 1, 1), (3, 2, 1))\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <tr>\n",
       "        <td>\n",
       "            <table style=\"border-collapse: collapse;\">\n",
       "                <thead>\n",
       "                    <tr>\n",
       "                        <td> </td>\n",
       "                        <th> Array </th>\n",
       "                        <th> Chunk </th>\n",
       "                    </tr>\n",
       "                </thead>\n",
       "                <tbody>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Bytes </th>\n",
       "                        <td> 144 B </td>\n",
       "                        <td> 24 B </td>\n",
       "                    </tr>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Shape </th>\n",
       "                        <td> (6, 6) </td>\n",
       "                        <td> (2, 3) </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Dask graph </th>\n",
       "                        <td colspan=\"2\"> 12 chunks in 1 graph layer </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Data type </th>\n",
       "                        <td colspan=\"2\"> int32 numpy.ndarray </td>\n",
       "                    </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </td>\n",
       "        <td>\n",
       "        <svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
       "\n",
       "  <!-- Horizontal lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"0\" y1=\"40\" x2=\"120\" y2=\"40\" />\n",
       "  <line x1=\"0\" y1=\"80\" x2=\"120\" y2=\"80\" />\n",
       "  <line x1=\"0\" y1=\"100\" x2=\"120\" y2=\"100\" />\n",
       "  <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Vertical lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"60\" y1=\"0\" x2=\"60\" y2=\"120\" />\n",
       "  <line x1=\"100\" y1=\"0\" x2=\"100\" y2=\"120\" />\n",
       "  <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Colored Rectangle -->\n",
       "  <polygon points=\"0.0,0.0 120.0,0.0 120.0,120.0 0.0,120.0\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
       "\n",
       "  <!-- Text -->\n",
       "  <text x=\"60.000000\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >6</text>\n",
       "  <text x=\"140.000000\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,60.000000)\">6</text>\n",
       "</svg>\n",
       "        </td>\n",
       "    </tr>\n",
       "</table>"
      ],
      "text/plain": [
       "dask.array<array, shape=(6, 6), dtype=int32, chunksize=(2, 3), chunktype=numpy.ndarray>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 3.每块第一个纬度为1，第二个纬度为6 (6/1)*(6/6)=6块\n",
    "darr = da.from_array(arr3, chunks=(1,6))\n",
    "print(darr.chunks)\n",
    "\n",
    "# 4.第一个纬度分为2和4，第二个纬度分为3和3 \n",
    "darr = da.from_array(arr3, chunks=((2,4),(3,3)))\n",
    "print(darr.chunks)\n",
    "\n",
    "# 5.第一个纬度分为2,2,1,1，第二个纬度分为3,2,1\n",
    "darr = da.from_array(arr3, chunks=((2,2,1,1),(3,2,1)))\n",
    "print(darr.chunks)\n",
    "darr"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dac1f08e-2a28-472d-8f4f-b25ee8b3df05",
   "metadata": {},
   "source": [
    "## 指定chunks的几个建议"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "32d6e8d7-59b0-44b1-bdcd-9f4ccb073b44",
   "metadata": {},
   "source": [
    "* 1.每一个chunk应该足够小以适应内存，因为会有很多chunk会被一次性读入内容。\n",
    "\n",
    "* 2.每一个chunk也应该足够大，至少在它身上花费的计算时间（建议至少100ms）要显著大于调度这个任务花费的时间（1ms）。\n",
    "\n",
    "* 3.通常每个chunk的大小一般在10MB-1GB之间,可以根据可用的内存和计算耗时适当调整。\n",
    "* 4.chunks应该和你选择的计算任务保持一致。\n",
    "  * For example, if you plan to frequently slice along a particular dimension, then it’s more efficient if your chunks are aligned so that you have to touch fewer chunks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "3d197b87-3650-4366-923f-e2eb143a8608",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <tr>\n",
       "        <td>\n",
       "            <table style=\"border-collapse: collapse;\">\n",
       "                <thead>\n",
       "                    <tr>\n",
       "                        <td> </td>\n",
       "                        <th> Array </th>\n",
       "                        <th> Chunk </th>\n",
       "                    </tr>\n",
       "                </thead>\n",
       "                <tbody>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Bytes </th>\n",
       "                        <td> 144 B </td>\n",
       "                        <td> 48 B </td>\n",
       "                    </tr>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Shape </th>\n",
       "                        <td> (6, 6) </td>\n",
       "                        <td> (6, 2) </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Dask graph </th>\n",
       "                        <td colspan=\"2\"> 3 chunks in 1 graph layer </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Data type </th>\n",
       "                        <td colspan=\"2\"> int32 numpy.ndarray </td>\n",
       "                    </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </td>\n",
       "        <td>\n",
       "        <svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
       "\n",
       "  <!-- Horizontal lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Vertical lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"40\" y1=\"0\" x2=\"40\" y2=\"120\" />\n",
       "  <line x1=\"80\" y1=\"0\" x2=\"80\" y2=\"120\" />\n",
       "  <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Colored Rectangle -->\n",
       "  <polygon points=\"0.0,0.0 120.0,0.0 120.0,120.0 0.0,120.0\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
       "\n",
       "  <!-- Text -->\n",
       "  <text x=\"60.000000\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >6</text>\n",
       "  <text x=\"140.000000\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,60.000000)\">6</text>\n",
       "</svg>\n",
       "        </td>\n",
       "    </tr>\n",
       "</table>"
      ],
      "text/plain": [
       "dask.array<array, shape=(6, 6), dtype=int32, chunksize=(6, 2), chunktype=numpy.ndarray>"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 沿着第二维度的位置1切片\n",
    "arr3 = np.array([[1, 2, 3, 4, 5, 6],\n",
    "        [7, 8, 9, 0, 1, 2],\n",
    "        [3, 4, 5, 6, 7, 8],\n",
    "        [9, 0, 1, 2, 3, 4],\n",
    "        [5, 6, 7, 8, 9, 0],\n",
    "        [1, 2, 3, 4, 5, 6]])\n",
    "# 第一维度设置chunk的步长为6\n",
    "data = da.from_array(arr3, chunks=(6,2))\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "c9e4a0ad-5bab-4f8e-b799-8902c8f1a1e3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1, 2, 3, 4, 5, 6],\n",
       "       [7, 8, 9, 0, 1, 2],\n",
       "       [3, 4, 5, 6, 7, 8],\n",
       "       [9, 0, 1, 2, 3, 4],\n",
       "       [5, 6, 7, 8, 9, 0],\n",
       "       [1, 2, 3, 4, 5, 6]])"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.compute()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "52038ea4-7ad3-47bf-89f2-d4e4a72d3f14",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 1.07 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([2, 8, 4, 0, 6, 2])"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 切片，沿着第二维度的位置1切片，只会touch一个chunk\n",
    "%time data[:,1].compute()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "8c3f9f66-c23e-4c72-9cc8-e8db4e4f0fb3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <tr>\n",
       "        <td>\n",
       "            <table style=\"border-collapse: collapse;\">\n",
       "                <thead>\n",
       "                    <tr>\n",
       "                        <td> </td>\n",
       "                        <th> Array </th>\n",
       "                        <th> Chunk </th>\n",
       "                    </tr>\n",
       "                </thead>\n",
       "                <tbody>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Bytes </th>\n",
       "                        <td> 144 B </td>\n",
       "                        <td> 8 B </td>\n",
       "                    </tr>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Shape </th>\n",
       "                        <td> (6, 6) </td>\n",
       "                        <td> (1, 2) </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Dask graph </th>\n",
       "                        <td colspan=\"2\"> 18 chunks in 1 graph layer </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Data type </th>\n",
       "                        <td colspan=\"2\"> int32 numpy.ndarray </td>\n",
       "                    </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </td>\n",
       "        <td>\n",
       "        <svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
       "\n",
       "  <!-- Horizontal lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"0\" y1=\"20\" x2=\"120\" y2=\"20\" />\n",
       "  <line x1=\"0\" y1=\"40\" x2=\"120\" y2=\"40\" />\n",
       "  <line x1=\"0\" y1=\"60\" x2=\"120\" y2=\"60\" />\n",
       "  <line x1=\"0\" y1=\"80\" x2=\"120\" y2=\"80\" />\n",
       "  <line x1=\"0\" y1=\"100\" x2=\"120\" y2=\"100\" />\n",
       "  <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Vertical lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"40\" y1=\"0\" x2=\"40\" y2=\"120\" />\n",
       "  <line x1=\"80\" y1=\"0\" x2=\"80\" y2=\"120\" />\n",
       "  <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Colored Rectangle -->\n",
       "  <polygon points=\"0.0,0.0 120.0,0.0 120.0,120.0 0.0,120.0\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
       "\n",
       "  <!-- Text -->\n",
       "  <text x=\"60.000000\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >6</text>\n",
       "  <text x=\"140.000000\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,60.000000)\">6</text>\n",
       "</svg>\n",
       "        </td>\n",
       "    </tr>\n",
       "</table>"
      ],
      "text/plain": [
       "dask.array<array, shape=(6, 6), dtype=int32, chunksize=(1, 2), chunktype=numpy.ndarray>"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 第一维度的步长设为1，所以纵轴方向会被切成6块\n",
    "data = da.from_array(arr3, chunks=(1,2))\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "4079e0bc-b312-490c-8a64-b0d7eb946786",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 1.88 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([2, 8, 4, 0, 6, 2])"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 沿着第二维度的位置1切片，会touch六个chunk\n",
    "%time data[:,1].compute()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d3bbd711-c27b-422d-a6d5-a2462e3ebdf5",
   "metadata": {},
   "source": [
    "结论：可以看到，两个相同的array，使用不同的chunks，进行相同的切换，第一个切片只会touch一个chunk，第二个切片会touch6个chunk，得到的结果是一样的，但是第二个明显比第一个耗时一些。"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6dd9b736-5d82-40a2-a73f-dd5e608c9e01",
   "metadata": {},
   "source": [
    "## Rechunking\n",
    "为了适应不同的操作，对于同一个array，可以对它进行rechunk操作，以让当前执行的操作获取最高的效率。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "74438878-38f6-494f-a334-dcbe4acbc7e6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <tr>\n",
       "        <td>\n",
       "            <table style=\"border-collapse: collapse;\">\n",
       "                <thead>\n",
       "                    <tr>\n",
       "                        <td> </td>\n",
       "                        <th> Array </th>\n",
       "                        <th> Chunk </th>\n",
       "                    </tr>\n",
       "                </thead>\n",
       "                <tbody>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Bytes </th>\n",
       "                        <td> 144 B </td>\n",
       "                        <td> 16 B </td>\n",
       "                    </tr>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Shape </th>\n",
       "                        <td> (6, 6) </td>\n",
       "                        <td> (2, 2) </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Dask graph </th>\n",
       "                        <td colspan=\"2\"> 9 chunks in 2 graph layers </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Data type </th>\n",
       "                        <td colspan=\"2\"> int32 numpy.ndarray </td>\n",
       "                    </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </td>\n",
       "        <td>\n",
       "        <svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
       "\n",
       "  <!-- Horizontal lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"0\" y1=\"40\" x2=\"120\" y2=\"40\" />\n",
       "  <line x1=\"0\" y1=\"80\" x2=\"120\" y2=\"80\" />\n",
       "  <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Vertical lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"40\" y1=\"0\" x2=\"40\" y2=\"120\" />\n",
       "  <line x1=\"80\" y1=\"0\" x2=\"80\" y2=\"120\" />\n",
       "  <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Colored Rectangle -->\n",
       "  <polygon points=\"0.0,0.0 120.0,0.0 120.0,120.0 0.0,120.0\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
       "\n",
       "  <!-- Text -->\n",
       "  <text x=\"60.000000\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >6</text>\n",
       "  <text x=\"140.000000\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,60.000000)\">6</text>\n",
       "</svg>\n",
       "        </td>\n",
       "    </tr>\n",
       "</table>"
      ],
      "text/plain": [
       "dask.array<rechunk-merge, shape=(6, 6), dtype=int32, chunksize=(2, 2), chunktype=numpy.ndarray>"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = data.rechunk(chunks=2)\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "80ad9596-960c-43c7-8d0b-a2efbbc8e7e9",
   "metadata": {},
   "source": [
    "## Automatic Chunking\n",
    "在指定chunk的shape时，除了指定一个具体的值，还可以传入一个特别的参数---auto，传入这个参数，dask会自动分配一个理想的shape大小。\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "bc32f7b6-9a20-4ce6-8d49-26fb50ad540a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <tr>\n",
       "        <td>\n",
       "            <table style=\"border-collapse: collapse;\">\n",
       "                <thead>\n",
       "                    <tr>\n",
       "                        <td> </td>\n",
       "                        <th> Array </th>\n",
       "                        <th> Chunk </th>\n",
       "                    </tr>\n",
       "                </thead>\n",
       "                <tbody>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Bytes </th>\n",
       "                        <td> 144 B </td>\n",
       "                        <td> 144 B </td>\n",
       "                    </tr>\n",
       "                    \n",
       "                    <tr>\n",
       "                        <th> Shape </th>\n",
       "                        <td> (6, 6) </td>\n",
       "                        <td> (6, 6) </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Dask graph </th>\n",
       "                        <td colspan=\"2\"> 1 chunks in 3 graph layers </td>\n",
       "                    </tr>\n",
       "                    <tr>\n",
       "                        <th> Data type </th>\n",
       "                        <td colspan=\"2\"> int32 numpy.ndarray </td>\n",
       "                    </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </td>\n",
       "        <td>\n",
       "        <svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
       "\n",
       "  <!-- Horizontal lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Vertical lines -->\n",
       "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "  <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
       "\n",
       "  <!-- Colored Rectangle -->\n",
       "  <polygon points=\"0.0,0.0 120.0,0.0 120.0,120.0 0.0,120.0\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
       "\n",
       "  <!-- Text -->\n",
       "  <text x=\"60.000000\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >6</text>\n",
       "  <text x=\"140.000000\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,60.000000)\">6</text>\n",
       "</svg>\n",
       "        </td>\n",
       "    </tr>\n",
       "</table>"
      ],
      "text/plain": [
       "dask.array<rechunk-merge, shape=(6, 6), dtype=int32, chunksize=(6, 6), chunktype=numpy.ndarray>"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = data.rechunk(chunks='auto')\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e78f1f6c-0806-4208-9dc5-3d9a8ffe4cd8",
   "metadata": {},
   "source": [
    "这个\"理想\"的shape其实和一个配置项---array.chunk-size有关系，这个配置项的默认值是128MiB，当传入auto时，dask进行chunking时会尝试使chunk的大小和这个值接近；\n",
    "但是也会根据实际情况找到合适的chunk（比如实际array的大小远小于这个值，就不能依据这个值来判断）。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "57dbc7bb-f5d7-434c-9146-6696873e2b19",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'128MiB'"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import dask\n",
    "dask.config.get('array.chunk-size')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
